# coding=utf8
import time

import tldextract
from selenium.webdriver.common.keys import Keys

from libs.sele import driver_new_session


def search_keyword(keyword):
    #search_url = "https://www.google.co.jp/webhp?hl=zh-CN&sa=X&ved=0ahUKEwihuZaBqujNAhVIkZQKHbdaCB4QPAgD"
    driver = driver_new_session()
    driver.get(keyword)
    #driver.find_element_by_id('lst-ib').send_keys(keyword)
    #driver.find_element_by_id('lst-ib').send_keys(Keys.ENTER)

    domain_statics = {}
    for domain in get_domain(driver):
        host = tldextract.extract(domain)
        host = '.'.join(part for part in host if part)
        # print(host)
        if host in domain_statics:
            domain_statics[host] += 1
        else:
            domain_statics[host] = 1

    res = []
    for key, val in domain_statics.items():
        res.append(key + ',' + str(val))
    with open("data/domain.csv", 'w', encoding='utf-8-sig') as f:
        f.write("\n".join(res))

    driver.close()


def get_domain(driver):
    n = 0
    while True:
        n += 1
        print(n)
        time.sleep(2)
        domains = driver.find_elements_by_tag_name('cite')
        for domain_a in domains:
            domain = domain_a.get_attribute('innerHTML')
            if domain.find(' ') > 0:
                domain = domain[:domain.find(' ')]
            yield domain.replace(r'<b>', '').replace(r'</b>', '')
        pn = driver.find_elements_by_css_selector('a.pn')
        if not len(pn):
            break
        if len(pn) == 1:
            if n != 1:
                break
            pn[0].click()
        else:
            pn[1].click()


if __name__ == '__main__':
    search_keyword('https://www.google.com/search?q=Zhou+Nutrition&safe=active&biw=1280&bih=676&filter=0')
